/*
 * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <sys/prctl.h>

#include "sample_comm_nnie.h"
#include "sample_media_ai.h"
#include "ai_infer_process.h"
#include "yolov2_face_detect.h"
#include "vgs_img.h"
#include "ive_img.h"
#include "misc_util.h"
#include "hisignalling.h"
#include "audio_aac_adp.h"
#include "base_interface.h"
#include "osd_img.h"
#include "posix_help.h"

#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif
#endif /* End of #ifdef __cplusplus */

#define HAND_FRM_WIDTH     640
#define HAND_FRM_HEIGHT    384
#define DETECT_OBJ_MAX     32
#define RET_NUM_MAX        4
#define DRAW_RETC_THICK    2    // Draw the width of the line
#define WIDTH_LIMIT        32
#define HEIGHT_LIMIT       32
#define IMAGE_WIDTH        224  // The resolution of the model IMAGE sent to the classification is 224*224
#define IMAGE_HEIGHT       224
#define MODEL_FILE_GESTURE    "/userdata/models/face_classify/face3_resnet18_classify_inst.wk" // darknet framework wk model
#define THRESH_MIN          30 
/****************************audio*********************/
#define BUFFER_SIZE           16    // audio buffer size
#define AUDIO_CASE_TWO     2
#define AUDIO_SCORE        40       // Confidence can be configured by yourself
#define AUDIO_FRAME        14       // Recognize once every 15 frames, can be configured by yourself
#define MULTIPLE_OF_EXPANSION 100   // Multiple of expansion
#define SCORE_MAX           4096    // The score corresponding to the maximum probability
#define UNKOWN_WASTE          20    // Unkown Waste

#define NORM_BUF_SIZE       256 // normal buf size

static int g_num = 108;
static int g_count = 0;

static HI_BOOL g_bAudioProcessStopSignal = HI_FALSE;
static pthread_t g_audioProcessThread = 0;
static OsdSet* g_osdsTrash = NULL;
static HI_S32 g_osd0Trash = -1;
#define MIN_OF_BOX            16    // min of box
#define MAX_OF_BOX            240   // max of box
/****************************audio************************/
static int biggestBoxIndex;
static IVE_IMAGE_S img;
static DetectObjInfo objs[DETECT_OBJ_MAX] = {0};
static RectBox boxs[DETECT_OBJ_MAX] = {0};
static RectBox objBoxs[DETECT_OBJ_MAX] = {0};
static RectBox remainingBoxs[DETECT_OBJ_MAX] = {0};
static RectBox cnnBoxs[DETECT_OBJ_MAX] = {0}; // Store the results of the classification network
static RecogNumInfo numInfo[RET_NUM_MAX] = {0};
static IVE_IMAGE_S imgIn;
static IVE_IMAGE_S imgDst;
static VIDEO_FRAME_INFO_S frmIn;
static VIDEO_FRAME_INFO_S frmDst;
int uartFd = 0;

static SkPair g_stmChn = {
    .in = -1,
    .out = -1
};

static HI_VOID PlayAudio(const RecogNumInfo items)
{
    if  (g_count < AUDIO_FRAME) {
        g_count++;
        return;
    }

    const RecogNumInfo *item = &items;
    uint32_t score = item->score * MULTIPLE_OF_EXPANSION / SCORE_MAX;
    if ((score > AUDIO_SCORE && score < 90) && (g_num != item->num)) {
        g_num = item->num;
    
        AudioTest(g_num, -1);
        
    }
    g_count = 0;
}
static HI_VOID* GetAudioFileName(HI_VOID* arg)
{
    RecogNumInfo resBuf = {0};
    int ret;

    while (g_bAudioProcessStopSignal == false) {
        ret = FdReadMsg(g_stmChn.in, &resBuf, sizeof(RecogNumInfo));
        if (ret == sizeof(RecogNumInfo)) {
            PlayAudio(resBuf);
        }
    }

    return NULL;
}

HI_S32 Yolo2FaceDetectResnetClassifyLoad(uintptr_t* model)
{
    SAMPLE_SVP_NNIE_CFG_S *self = NULL;
    HI_S32 ret;

    /***************osd 显示****************/
    HI_CHAR audioThreadName[BUFFER_SIZE] = {0};
    ret = CnnCreate(&self, MODEL_FILE_GESTURE);
    *model = ret < 0 ? 0 : (uintptr_t)self;
    FaceDetectInit(); // Initialize the face detection model
    SAMPLE_PRT("Load face detect claasify model success\n");
    /* uart open init */
    uartFd = UartOpenInit();
    if (uartFd < 0) {
        printf("uart1 open failed\r\n");
    } else {
        printf("uart1 open successed\r\n");
    }

    /***************audio init******************/
    if (GetCfgBool("audio_player:support_audio", true)) {
        ret = SkPairCreate(&g_stmChn);
        HI_ASSERT(ret == 0);
        if (snprintf_s(audioThreadName, BUFFER_SIZE, BUFFER_SIZE - 1, "AudioProcess") < 0) {
            HI_ASSERT(0);
        }
        prctl(PR_SET_NAME, (unsigned long)audioThreadName, 0, 0, 0);
        ret = pthread_create(&g_audioProcessThread, NULL, GetAudioFileName, NULL);
        if (ret != 0) {
            SAMPLE_PRT("face audio proccess thread creat fail:%s\n", strerror(ret));
            return ret;
        }
    }
    return ret;
}

HI_S32 Yolo2FaceDetectResnetClassifyUnload(uintptr_t model)
{

    CnnDestroy((SAMPLE_SVP_NNIE_CFG_S*)model);
    FaceDetectExit(); // Uninitialize the hand detection model
    OsdsClear(g_osdsTrash);
    /***********************  destory audio  ************************/
     if (GetCfgBool("audio_player:support_audio", true)) {
        SkPairDestroy(&g_stmChn);
        SAMPLE_PRT("SkPairDestroy success\n");
        g_bAudioProcessStopSignal = HI_TRUE;
        pthread_join(g_audioProcessThread, NULL);
        g_audioProcessThread = 0;
    }
    SAMPLE_PRT("Unload face detect claasify model success\n");
        system("sh /userdata/light_off.sh");
    return 0;
}

/* Get the maximum hand */
static HI_S32 GetBiggestFaceIndex(RectBox boxs[], int detectNum)
{
    HI_S32 handIndex = 0;
    HI_S32 biggestBoxIndex = handIndex;
    HI_S32 biggestBoxWidth = boxs[handIndex].xmax - boxs[handIndex].xmin + 1;
    HI_S32 biggestBoxHeight = boxs[handIndex].ymax - boxs[handIndex].ymin + 1;
    HI_S32 biggestBoxArea = biggestBoxWidth * biggestBoxHeight;

    for (handIndex = 1; handIndex < detectNum; handIndex++) {
        HI_S32 boxWidth = boxs[handIndex].xmax - boxs[handIndex].xmin + 1;
        HI_S32 boxHeight = boxs[handIndex].ymax - boxs[handIndex].ymin + 1;
        HI_S32 boxArea = boxWidth * boxHeight;
        if (biggestBoxArea < boxArea) {
            biggestBoxArea = boxArea;
            biggestBoxIndex = handIndex;
        }
        biggestBoxWidth = boxs[biggestBoxIndex].xmax - boxs[biggestBoxIndex].xmin + 1;
        biggestBoxHeight = boxs[biggestBoxIndex].ymax - boxs[biggestBoxIndex].ymin + 1;
    }

    if ((biggestBoxWidth == 1) || (biggestBoxHeight == 1) || (detectNum == 0)) {
        biggestBoxIndex = -1;
    }

    return biggestBoxIndex;
}

/* hand gesture recognition info */
static void FaceDetectFlag(const RecogNumInfo resBuf)
{

    HI_CHAR *gestureName = NULL;
    uint32_t score_xxq;
    uint32_t score_djy;
    uint32_t score_stranger;
    
    switch (resBuf.num) {

	case 0u:
     score_xxq = resBuf.score * HI_PER_BASE / SCORE_MAX;
     if (score_xxq>30 && score_xxq<85) {
		gestureName = "gesture xxq";
		system("sh /userdata/light_on.sh");
        UartSendRead(uartFd, FistGesture); // duanjiayu
        SAMPLE_PRT("----gesture score----:%d\n", score_xxq);
		SAMPLE_PRT("----gesture name----:%s\n", gestureName);
        break;
        }
        else{
            gestureName = "gesture others";
            SAMPLE_PRT("----gesture score----:%d\n", score_xxq);
            SAMPLE_PRT("----gesture score----:%d\n", gestureName);
            break;
        }
		
	case 1u:
     score_djy = resBuf.score * HI_PER_BASE / SCORE_MAX;
     if (score_djy>30 && score_djy<85) {
		gestureName = "gesture djy";
        system("sh /userdata/light_on.sh");
		
		UartSendRead(uartFd, ForefingerGesture); // xxq
        SAMPLE_PRT("----gesture score----:%d\n", score_djy);
		SAMPLE_PRT("----gesture name----:%s\n", gestureName);
        break;
     }
     else{
        gestureName = "gesture others";
        SAMPLE_PRT("----gesture score----:%d\n", score_djy);
        SAMPLE_PRT("----gesture score----:%d\n", gestureName);
        break;
     }
		
	case 2u:
        score_stranger = resBuf.score * HI_PER_BASE / SCORE_MAX;
        if (score_stranger>30 && score_stranger<85) {
        gestureName = "gesture Stranger";
		system("sh /userdata/light_on.sh");
		UartSendRead(uartFd, OkGesture); // background
        uint32_t score_stranger = resBuf.score * HI_PER_BASE / SCORE_MAX;
        SAMPLE_PRT("----gesture score----:%d\n", score_stranger);
		SAMPLE_PRT("----gesture name----:%s\n", gestureName);
		break;
        }
        else
        {
        gestureName = "gesture others";
        SAMPLE_PRT("----gesture score----:%d\n", score_stranger);
		SAMPLE_PRT("----gesture name----:%s\n", gestureName);
		break;
        }

	default:
		gestureName = "gesture others";
        system("sh /userdata/light_off.sh");
		UartSendRead(uartFd, InvalidGesture); // others
		SAMPLE_PRT("----gesture name----:%s\n", gestureName);
		break;


    }

    SAMPLE_PRT("=====face gesture success=====\n");
}

HI_S32 Yolo2FaceDetectResnetClassifyCal(uintptr_t model, VIDEO_FRAME_INFO_S *srcFrm, VIDEO_FRAME_INFO_S *dstFrm)
{
    SAMPLE_SVP_NNIE_CFG_S *self = (SAMPLE_SVP_NNIE_CFG_S*)model;
    HI_S32 resLen = 0;
    int objNum;
    int ret;
    int num = 0;

    ret = FrmToOrigImg((VIDEO_FRAME_INFO_S*)srcFrm, &img);
    SAMPLE_CHECK_EXPR_RET(ret != HI_SUCCESS, ret, "face detect for YUV Frm to Img FAIL, ret=%#x\n", ret);

    objNum = FaceDetectCal(&img, objs); // Send IMG to the detection net for reasoning
    for (int i = 0; i < objNum; i++) {
        cnnBoxs[i] = objs[i].box;
        RectBox *box = &objs[i].box;
        RectBoxTran(box, HAND_FRM_WIDTH, HAND_FRM_HEIGHT,
            dstFrm->stVFrame.u32Width, dstFrm->stVFrame.u32Height);
        SAMPLE_PRT("yolo2_out: {%d, %d, %d, %d}\n", box->xmin, box->ymin, box->xmax, box->ymax);
        boxs[i] = *box;
    }
    biggestBoxIndex = GetBiggestFaceIndex(boxs, objNum);
    SAMPLE_PRT("biggestBoxIndex:%d, objNum:%d\n", biggestBoxIndex, objNum);

    // When an object is detected, a rectangle is drawn in the DSTFRM
    if (biggestBoxIndex >= 0) {
        objBoxs[0] = boxs[biggestBoxIndex];
        MppFrmDrawRects(dstFrm, objBoxs, 1, RGB888_GREEN, DRAW_RETC_THICK); // Target hand objnum is equal to 1

        for (int j = 0; (j < objNum) && (objNum > 1); j++) {
            if (j != biggestBoxIndex) {
                remainingBoxs[num++] = boxs[j];
                // others hand objnum is equal to objnum -1
                MppFrmDrawRects(dstFrm, remainingBoxs, objNum - 1, RGB888_RED, DRAW_RETC_THICK);
            }
        }

        // Crop the image to classification network
        ret = ImgYuvCrop(&img, &imgIn, &cnnBoxs[biggestBoxIndex]);
        SAMPLE_CHECK_EXPR_RET(ret < 0, ret, "ImgYuvCrop FAIL, ret=%#x\n", ret);

        if ((imgIn.u32Width >= WIDTH_LIMIT) && (imgIn.u32Height >= HEIGHT_LIMIT)) {
            COMPRESS_MODE_E enCompressMode = srcFrm->stVFrame.enCompressMode;
            ret = OrigImgToFrm(&imgIn, &frmIn);
            frmIn.stVFrame.enCompressMode = enCompressMode;
            SAMPLE_PRT("crop u32Width = %d, img.u32Height = %d\n", imgIn.u32Width, imgIn.u32Height);
            ret = MppFrmResize(&frmIn, &frmDst, IMAGE_WIDTH, IMAGE_HEIGHT);
            ret = FrmToOrigImg(&frmDst, &imgDst);
            ret = CnnCalU8c1Img(self,  &imgDst, numInfo, sizeof(numInfo) / sizeof((numInfo)[0]), &resLen);
            SAMPLE_CHECK_EXPR_RET(ret < 0, ret, "CnnCalU8c1Img FAIL, ret=%#x\n", ret);
            HI_ASSERT(resLen <= sizeof(numInfo) / sizeof(numInfo[0]));
            FaceDetectFlag(numInfo[0]);
            /**********audio********/
            if (GetCfgBool("audio_player:support_audio", true)) {
            if (FdWriteMsg(g_stmChn.out, &numInfo[0], sizeof(RecogNumInfo)) != sizeof(RecogNumInfo)) {
                SAMPLE_PRT("FdWriteMsg FAIL\n");
                }
            }
            MppFrmDestroy(&frmDst);
        }
        IveImgDestroy(&imgIn);
    }

    return ret;
}

#ifdef __cplusplus
#if __cplusplus
}
#endif
#endif /* End of #ifdef __cplusplus */

